{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from sklearn.datasets import load_boston" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "data = load_boston()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "dict_keys(['data', 'target', 'feature_names', 'DESCR'])" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.keys()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": true }, "outputs": [ { "data": { "text/plain": [ "array([[6.3200e-03, 1.8000e+01, 2.3100e+00, ..., 1.5300e+01, 3.9690e+02,\n", " 4.9800e+00],\n", " [2.7310e-02, 0.0000e+00, 7.0700e+00, ..., 1.7800e+01, 3.9690e+02,\n", " 9.1400e+00],\n", " [2.7290e-02, 0.0000e+00, 7.0700e+00, ..., 1.7800e+01, 3.9283e+02,\n", " 4.0300e+00],\n", " ...,\n", " [6.0760e-02, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9690e+02,\n", " 5.6400e+00],\n", " [1.0959e-01, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9345e+02,\n", " 6.4800e+00],\n", " [4.7410e-02, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9690e+02,\n", " 7.8800e+00]])" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data['data']" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(404, 13)\n", "(102, 13)\n", "(404,)\n", "(102,)\n" ] } ], "source": [ "from sklearn.model_selection import train_test_split\n", "\n", "train_data, test_data, train_target, test_target = train_test_split(data['data'], data['target'], test_size=0.2)\n", "\n", "print(train_data.shape)\n", "print(test_data.shape)\n", "print(train_target.shape)\n", "print(test_target.shape)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "from sklearn.preprocessing import MinMaxScaler\n", "\n", "model = MinMaxScaler().fit(train_data)\n", "train_data_mms = model.transform(train_data)\n", "test_data_mms = model.transform(test_data)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([-1.76956027e-04, 0.00000000e+00, -1.68621701e-02, 0.00000000e+00,\n", " -7.92181070e-01, -6.82314620e-01, -2.98661174e-02, -1.02719857e-01,\n", " -4.34782609e-02, -3.56870229e-01, -1.34042553e+00, -8.06898986e-04,\n", " -4.77373068e-02])" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.min_" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "from sklearn.decomposition import PCA\n", "\n", "model = PCA(n_components=8).fit(train_data_mms)\n", "train_data_mms = model.transform(train_data_mms)\n", "test_data_mms = model.transform(test_data_mms)" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "from sklearn.datasets import load_iris\n", "from sklearn.cluster import KMeans\n", "\n", "data = load_iris()\n", "model = KMeans(n_clusters=3).fit(data['data'])" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", " 0, 0, 0, 0, 0, 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2,\n", " 2, 2, 2, 1, 1, 2, 2, 2, 2, 1, 2, 1, 2, 1, 2, 2, 1, 1, 2, 2, 2, 2,\n", " 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 1])" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.labels_" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[5.006 , 3.418 , 1.464 , 0.244 ],\n", " [5.9016129 , 2.7483871 , 4.39354839, 1.43387097],\n", " [6.85 , 3.07368421, 5.74210526, 2.07105263]])" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.cluster_centers_" ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "collapsed": true }, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "import matplotlib.pyplot as plt\n", "\n", "for i in range(3):\n", " plt.scatter(data['data'][model.labels_ == i, 0], data['data'][model.labels_ == i, 1])\n", " \n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.5525919445213676" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.metrics import silhouette_score\n", "fo\n", "silhouette_score(data['data'], model.labels_)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# 数据导入\n", "from sklearn.datasets import load_breast_cancer\n", "data = load_breast_cancer()\n", "x = data['data']\n", "y = data['target']" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# 划分训练集、测试集\n", "from sklearn.model_selection import train_test_split\n", "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "# 模型预处理\n", "from sklearn.preprocessing import StandardScaler\n", "model = StandardScaler().fit(x_train)\n", "x_train_ss = model.transform(x_train)\n", "x_test_ss = model.transform(x_test)" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "# 分类模型构建\n", "from sklearn.svm import SVC\n", "model = SVC().fit(x_train_ss, y_train)\n", "y_pre = model.predict(x_test_ss)" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.9736842105263158" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.score(x_test_ss, y_test)" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1.0\n", "0.9565217391304348\n", "0.9777777777777777\n" ] } ], "source": [ "from sklearn.metrics import recall_score, precision_score, f1_score, roc_curve\n", "print(recall_score(y_test, y_pre))\n", "print(precision_score(y_test, y_pre))\n", "print(f1_score(y_test, y_pre))" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD8CAYAAACMwORRAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAEwJJREFUeJzt3W+MXFd9xvHn2V2vE/+LY++agp1gFzmAq5KKLAFVCEJRi52+sJB4kYCIGoEsqwTxMlGlwgveFKFKCBFiWVEU8QarKhGYypBWqiCV0rTZSCGJiYy2piQOVDM2ITtrZ2c9u7++mPF6stk/1+u7M3vP+X4kS3vvnOz8Do6eHM695xxHhAAAaRnodwEAgPIR7gCQIMIdABJEuANAggh3AEgQ4Q4ACSLcASBBhDsAJIhwB4AEDfXri0dGRmLv3r39+noAqKTnnnvufESMrtSub+G+d+9ejY+P9+vrAaCSbP+mSDumZQAgQYQ7ACSIcAeABBHuAJAgwh0AErRiuNt+zHbN9ktLfG7b37Y9YfsF2x8sv0wAwLUoMnJ/XNLBZT4/JGl/588RSY9cf1kAgOux4nvuEfGU7b3LNDks6XvRPq/vGdvbbb8zIn5XUo1r5mKzpcef/l81L8/2uxQAGRnbu0Mfu23FdUjXpYxFTLslvdp1fa5z723hbvuI2qN73XrrrSV89fX52Zm6vvnkGUmS3ediAGTj6MffU4lwLywijks6LkljY2N9P5n7/yanJUnPf/UvtX3TcJ+rAYDylPG2zGuSbum63tO5t+7VG00NDw7ophs39LsUAChVGeF+UtJ9nbdmPiLpjSrMt0tSrTGt0a0bZeZkACRmxWkZ29+XdJekEdvnJH1N0gZJiohjkk5JulvShKRLku5fq2LLVm80Nbp1Y7/LAIDSFXlb5t4VPg9JXyqtoh6qN5q6ZcemfpcBAKXLeoVqrdHULkbuABKUbbhfnp3T7y/OMC0DIEnZhvv5qaYkadfWG/pcCQCUL9twr01eCXdG7gDSk2241xvtcGdaBkCKsg33Wifcd20j3AGkJ+Nwb289MLKFcAeQnmzDvd5oasfmYW0YzPZ/AgAJyzbZeMcdQMqyDXe2HgCQMsIdABKUZbhHhOqNJguYACQry3B/483LmpmdY+QOIFlZhvv8O+6EO4BE5RnubD0AIHFZhnt9qr2AiWkZAKnKMtznR+7beKAKIE15hnujqU3Dg9qyccWDqACgkrIMd95xB5C6LMO91pjmYSqApGUZ7ixgApC6LMO9xrQMgMRlF+7Tl2fVmG4R7gCSll24c7wegBxkF+5XTmDigSqAlOUX7vNbD/BAFUC6sgv3+hTTMgDSl1241yabGhywdm4e7ncpALBmsgv3eqOpkS3DGhhwv0sBgDWTXbjXGtNMyQBIXqFwt33Q9hnbE7YfWuTzm2z/2PYvbJ+2fX/5pZajxupUABlYMdxtD0p6WNIhSQck3Wv7wIJmX5L0y4i4XdJdkv7R9rqc1K43mhrdwsgdQNqKjNzvlDQREWcjYkbSCUmHF7QJSVttW9IWSb+X1Cq10hLMzoXOTzW1axvhDiBtRcJ9t6RXu67Pde51+46k90v6raQXJX0lIuZKqbBEFy42NRcsYAKQvrIeqH5K0vOS3iXpzyR9x/a2hY1sH7E9bnu8Xq+X9NXFsfUAgFwUCffXJN3Sdb2nc6/b/ZKeiLYJSb+W9L6FvygijkfEWESMjY6OrrbmVavNhzsPVAGkrUi4Pytpv+19nYek90g6uaDNK5I+KUm23yHpvZLOllloGerzWw8wcgeQthUPEY2Ilu0HJD0paVDSYxFx2vbRzufHJH1d0uO2X5RkSQ9GxPk1rHtV2HoAQC4KnRAdEacknVpw71jXz7+V9Ffllla+2uS0tt0wpBs2DPa7FABYU1mtUK1PNbVrG/PtANKXVbjXJlnABCAPeYV7gwVMAPKQTbhHBFsPAMhGNuE+1WzpzcuzjNwBZCGbcL+ygIkdIQHkIJtwZ+sBADnJJtyvjtwJdwDpyyfcJ6clMS0DIA/ZhHt9qqnhoQFtu7HQolwAqLR8wr2zgKl9nggApC2fcOcEJgAZySbc2XoAQE7yCffGNCN3ANnIItxnWnN6/dJljW7hTRkAecgi3M93Dulg5A4gF1mEOwuYAOQmi3Bn6wEAucki3GsNVqcCyEse4T7ZlC3t3DLc71IAoCeyCPf6VFM7Ng1rw2AW3QWAPMK9Ntlkvh1AVrII9/oU4Q4gL3mE++Q0D1MBZCX5cI8INg0DkJ3kw/0Ply7r8mywaRiArCQf7vOrUxm5A8hIBuHOAiYA+Uk+3Nl6AECOkg93Ng0DkKP0w32yqc3Dg9q8kYOxAeSjULjbPmj7jO0J2w8t0eYu28/bPm375+WWuXosYAKQoxWHs7YHJT0s6S8lnZP0rO2TEfHLrjbbJX1X0sGIeMX2rrUq+FrVWMAEIENFRu53SpqIiLMRMSPphKTDC9p8VtITEfGKJEVErdwyV4+RO4AcFQn33ZJe7bo+17nX7TZJN9v+me3nbN+32C+yfcT2uO3xer2+uoqvUZ1NwwBkqKwHqkOS7pD015I+Jenvbd+2sFFEHI+IsYgYGx0dLemrl/bmzKwazRYLmABkp8grJK9JuqXrek/nXrdzki5ExEVJF20/Jel2Sb8qpcpVmn/Hna0HAGSmyMj9WUn7be+zPSzpHkknF7T5kaSP2h6yvUnShyW9XG6p125+deo2HqgCyMuKI/eIaNl+QNKTkgYlPRYRp20f7Xx+LCJetv1TSS9ImpP0aES8tJaFF8ECJgC5KrSyJyJOSTq14N6xBdfflPTN8kq7fmw9ACBXSa9QrTWmNTRg7djEwdgA8pJ0uNcbTY1s2aiBAfe7FADoqaTDvdbgHXcAeUo73CebPEwFkKWkw52tBwDkKtlwn50LXZhi5A4gT8mG+4WppuZCGmUBE4AMJRvuNbYeAJCxZMP9ygImNg0DkKNkw31+Xxnm3AFkKNlwvzJyH2FaBkCGkg33WqOpm27coBs2DPa7FADouWTDvc7qVAAZSzbcaw3ecQeQr4TDfZpwB5CtJMM9IpiWAZC1JMO90Wxp+vKcdm1ldSqAPCUZ7rVJFjAByFuS4V5n6wEAmUsy3OdXpzJyB5CpJMP96sHYzLkDyFOy4T48NKBtNwz1uxQA6Iskw/3KAiabg7EB5CnJcOcddwC5SzLcWZ0KIHeJhnuTBUwAspZcuDdbs/rDpctMywDIWnLhfn5qRhInMAHIW3LhXptkARMAJBfuV7ceYM4dQL4Khbvtg7bP2J6w/dAy7T5ku2X7M+WVeG1qDTYNA4AVw932oKSHJR2SdEDSvbYPLNHuG5L+tewir0Wt0ZQt7dw83M8yAKCviozc75Q0ERFnI2JG0glJhxdp92VJP5BUK7G+a1ZvNLVz87CGBpObcQKAwook4G5Jr3Zdn+vcm2d7t6RPS3qkvNJWp96YZsMwANkra3j7LUkPRsTcco1sH7E9bnu8Xq+X9NVvxdYDACAV2TbxNUm3dF3v6dzrNibpRGejrhFJd9tuRcQPuxtFxHFJxyVpbGwsVlv0cmqNpva/Y+ta/GoAqIwi4f6spP2296kd6vdI+mx3g4jYd+Vn249L+peFwd4Lc3Ptg7FZwAQgdyuGe0S0bD8g6UlJg5Iei4jTto92Pj+2xjUW9oc3L6s1F0zLAMheodMsIuKUpFML7i0a6hHxN9df1urMH6/HA1UAmUvqfcHaJAuYAEBKLNyvbj1AuAPIW1LhXps/GJtwB5C3xMJ9WpuHB7V5IwdjA8hbUuFebzS1axsPUwEgqXCvsToVACQlFu7nCXcAkJRYuNdYnQoAkhIK90szLU01WyxgAgAlFO51XoMEgHnJhPv88XqEOwAkFO5sPQAA85IJ93pn0zC2HgCAhMK91mhqaMC6eRMHYwNAUuE+smWjBgbc71IAoO+SCff21gNMyQCAlFC4s4AJAK5KJtzrbD0AAPOSCPfW7JwuXGxqlNWpACApkXC/cHFGESxgAoArkgh3th4AgLdKItxrnQVMjNwBoC2NcJ9k5A4A3ZIId6ZlAOCtkgj3WqOp7Zs2aOPQYL9LAYB1IYlwrzeabBgGAF2SCPdaY5qtBwCgSyLh3uR4PQDoUvlwjwi2HgCABSof7pPTLTVbc7zjDgBdKh/u8ycwEe4AMK9QuNs+aPuM7QnbDy3y+edsv2D7RdtP2769/FIXV+MddwB4mxXD3fagpIclHZJ0QNK9tg8saPZrSR+PiD+V9HVJx8sudClXFjDxQBUArioycr9T0kREnI2IGUknJB3ubhART0fE653LZyTtKbfMpbH1AAC8XZFw3y3p1a7rc517S/mCpJ8s9oHtI7bHbY/X6/XiVS6jPtXUxqEBbbthqJTfBwApKPWBqu1PqB3uDy72eUQcj4ixiBgbHR0t5Ttrk+0FTDYHYwPAFUWGu69JuqXrek/n3lvY/oCkRyUdiogL5ZS3svoUWw8AwEJFRu7PStpve5/tYUn3SDrZ3cD2rZKekPT5iPhV+WUurTbJ6lQAWGjFcI+IlqQHJD0p6WVJ/xQRp20ftX200+yrknZK+q7t522Pr1nFC9QaTfaVAYAFCj2FjIhTkk4tuHes6+cvSvpiuaWtrNma1RtvXmZaBgAWqPQK1fl33Bm5A8BbVDrcayxgAoBFVTrcOV4PABZX6XC/OnIn3AGgW6XDvT45LVvasXm436UAwLpS7XCfamrn5o0aGqx0NwCgdJVOxfYCJqZkAGChSod7fYrj9QBgMZUOd0buALC4yob73Fzo/BRbDwDAYiob7q9fmlFrLth6AAAWUdlwn3/HfRurUwFgocqHOw9UAeDtKhvudVanAsCSKhvutca0JEbuALCY6ob7ZFNbNg5p0zAHYwPAQpUN9/oU77gDwFKqG+6TrE4FgKVUN9zZegAAllTZcK9NTnMCEwAsoZLhfrHZ0sWZWbYeAIAlVDLc54/XY+sBAFhUJcP96tYDhDsALKai4c4CJgBYTiXD/erWAzxQBYDFVDLca42mNgxa22/c0O9SAGBdqma4TzY1smWjBgbc71IAYF2qZLiz9QAALK+S4V6bnNYo8+0AsKRKhvt5th4AgGUVCnfbB22fsT1h+6FFPrftb3c+f8H2B8svta01O6cLF2eYlgGAZawY7rYHJT0s6ZCkA5LutX1gQbNDkvZ3/hyR9EjJdc47PzWjCBYwAcByiozc75Q0ERFnI2JG0glJhxe0OSzpe9H2jKTttt9Zcq2S2HoAAIooEu67Jb3adX2uc+9a25TiyurUXdt4oAoAS+npA1XbR2yP2x6v1+ur+h3bN23QwT/5I73rJsIdAJZS5ADS1yTd0nW9p3PvWtsoIo5LOi5JY2NjcU2Vdtzx7h264/M7VvOPAkA2iozcn5W03/Y+28OS7pF0ckGbk5Lu67w18xFJb0TE70quFQBQ0Ioj94ho2X5A0pOSBiU9FhGnbR/tfH5M0ilJd0uakHRJ0v1rVzIAYCVFpmUUEafUDvDue8e6fg5JXyq3NADAalVyhSoAYHmEOwAkiHAHgAQR7gCQIMIdABLk9osuffhiuy7pN6v8x0cknS+xnCqgz3mgz3m4nj6/OyJGV2rUt3C/HrbHI2Ks33X0En3OA33OQy/6zLQMACSIcAeABFU13I/3u4A+oM95oM95WPM+V3LOHQCwvKqO3AEAy1jX4b6eDubulQJ9/lynry/aftr27f2os0wr9bmr3Ydst2x/ppf1rYUifbZ9l+3nbZ+2/fNe11i2Av9u32T7x7Z/0elzpXeXtf2Y7Zrtl5b4fG3zKyLW5R+1txf+H0l/LGlY0i8kHVjQ5m5JP5FkSR+R9F/9rrsHff5zSTd3fj6UQ5+72v272ruTfqbfdffg73m7pF9KurVzvavfdfegz38n6Rudn0cl/V7ScL9rv44+f0zSByW9tMTna5pf63nkvq4O5u6RFfscEU9HxOudy2fUPvWqyor8PUvSlyX9QFKtl8WtkSJ9/qykJyLiFUmKiKr3u0ifQ9JW25a0Re1wb/W2zPJExFNq92Epa5pf6znc19XB3D1yrf35gtr/5a+yFftse7ekT0t6pId1raUif8+3SbrZ9s9sP2f7vp5VtzaK9Pk7kt4v6beSXpT0lYiY6015fbGm+VXosA6sP7Y/oXa4f7TftfTAtyQ9GBFz7UFdFoYk3SHpk5JulPSftp+JiF/1t6w19SlJz0v6C0nvkfRvtv8jIib7W1Y1redwL+1g7gop1B/bH5D0qKRDEXGhR7WtlSJ9HpN0ohPsI5Lutt2KiB/2psTSFenzOUkXIuKipIu2n5J0u6SqhnuRPt8v6R+iPSE9YfvXkt4n6b97U2LPrWl+redpmRwP5l6xz7ZvlfSEpM8nMopbsc8RsS8i9kbEXkn/LOlvKxzsUrF/t38k6aO2h2xvkvRhSS/3uM4yFenzK2r/PxXZfoek90o629Mqe2tN82vdjtwjw4O5C/b5q5J2SvpuZyTbigpvulSwz0kp0ueIeNn2TyW9IGlO0qMRsegrdVVQ8O/565Iet/2i2m+QPBgRld0t0vb3Jd0lacT2OUlfk7RB6k1+sUIVABK0nqdlAACrRLgDQIIIdwBIEOEOAAki3AEgQYQ7ACSIcAeABBHuAJCg/wevOynJB+TgSAAAAABJRU5ErkJggg==\n", "text/plain": [ "" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "fpr, tpr, thresholds = roc_curve(y_test, y_pre)\n", "import matplotlib.pyplot as plt\n", "plt.plot(fpr, tpr)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "collapsed": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Help on function roc_curve in module sklearn.metrics.ranking:\n", "\n", "roc_curve(y_true, y_score, pos_label=None, sample_weight=None, drop_intermediate=True)\n", " Compute Receiver operating characteristic (ROC)\n", " \n", " Note: this implementation is restricted to the binary classification task.\n", " \n", " Read more in the :ref:`User Guide `.\n", " \n", " Parameters\n", " ----------\n", " \n", " y_true : array, shape = [n_samples]\n", " True binary labels in range {0, 1} or {-1, 1}. If labels are not\n", " binary, pos_label should be explicitly given.\n", " \n", " y_score : array, shape = [n_samples]\n", " Target scores, can either be probability estimates of the positive\n", " class, confidence values, or non-thresholded measure of decisions\n", " (as returned by \"decision_function\" on some classifiers).\n", " \n", " pos_label : int or str, default=None\n", " Label considered as positive and others are considered negative.\n", " \n", " sample_weight : array-like of shape = [n_samples], optional\n", " Sample weights.\n", " \n", " drop_intermediate : boolean, optional (default=True)\n", " Whether to drop some suboptimal thresholds which would not appear\n", " on a plotted ROC curve. This is useful in order to create lighter\n", " ROC curves.\n", " \n", " .. versionadded:: 0.17\n", " parameter *drop_intermediate*.\n", " \n", " Returns\n", " -------\n", " fpr : array, shape = [>2]\n", " Increasing false positive rates such that element i is the false\n", " positive rate of predictions with score >= thresholds[i].\n", " \n", " tpr : array, shape = [>2]\n", " Increasing true positive rates such that element i is the true\n", " positive rate of predictions with score >= thresholds[i].\n", " \n", " thresholds : array, shape = [n_thresholds]\n", " Decreasing thresholds on the decision function used to compute\n", " fpr and tpr. `thresholds[0]` represents no instances being predicted\n", " and is arbitrarily set to `max(y_score) + 1`.\n", " \n", " See also\n", " --------\n", " roc_auc_score : Compute Area Under the Curve (AUC) from prediction scores\n", " \n", " Notes\n", " -----\n", " Since the thresholds are sorted from low to high values, they\n", " are reversed upon returning them to ensure they correspond to both ``fpr``\n", " and ``tpr``, which are sorted in reversed order during their calculation.\n", " \n", " References\n", " ----------\n", " .. [1] `Wikipedia entry for the Receiver operating characteristic\n", " `_\n", " \n", " \n", " Examples\n", " --------\n", " >>> import numpy as np\n", " >>> from sklearn import metrics\n", " >>> y = np.array([1, 1, 2, 2])\n", " >>> scores = np.array([0.1, 0.4, 0.35, 0.8])\n", " >>> fpr, tpr, thresholds = metrics.roc_curve(y, scores, pos_label=2)\n", " >>> fpr\n", " array([ 0. , 0.5, 0.5, 1. ])\n", " >>> tpr\n", " array([ 0.5, 0.5, 1. , 1. ])\n", " >>> thresholds\n", " array([ 0.8 , 0.4 , 0.35, 0.1 ])\n", "\n" ] } ], "source": [ "help(roc_curve)" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [], "source": [ "from sklearn.datasets import load_boston\n", "data = load_boston()\n", "x = data['data']\n", "y = data['target']" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split\n", "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [], "source": [ "from sklearn.linear_model import LinearRegression\n", "model = LinearRegression().fit(x_train, y_train)" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [], "source": [ "y_pre = model.predict(x_test)" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "import matplotlib.pyplot as plt\n", "plt.plot(range(len(y_test)), y_test)\n", "plt.plot(range(len(y_pre)), y_pre)\n", "plt.legend(['real', 'predict'])\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.7752573895354288" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.metrics import mean_squared_error, r2_score\n", "mean_squared_error(y_true=y_test, y_pred=y_pre)\n", "r2_score(y_true=y_test, y_pred=y_pre)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.6" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 2 }